D <- read.table("finance_data.csv", header=TRUE, sep=";",
as.is=TRUE)
## Dimensions of D (number of rows and columns)
dim(D)
## [1] 454 2
## Column/variable names
names(D)
## [1] "time" "SLV"
## The first rows/observations
head(D)
## The last rows/observations
tail(D)
## Selected summary statistics
summary(D)
## time SLV
## Length:454 Min. :-0.238893
## Class :character 1st Qu.:-0.026350
## Mode :character Median : 0.002226
## Mean : 0.001468
## 3rd Qu.: 0.033122
## Max. : 0.267308
## Another type of summary of the dataset
str(D)
## 'data.frame': 454 obs. of 2 variables:
## $ time: chr "2006-5-5" "2006-5-12" "2006-5-19" "2006-5-26" ...
## $ SLV : num 0.01376 0.03286 -0.12863 0.00556 -0.04578 ...
D$date <- as.Date(D$time)
D$year <- year(D$date)
ggplot(D, aes(x = date, y = SLV)) + geom_point()
# ggplot(D, aes(x = SLV)) +
# geom_histogram(aes(y = ..density..), color = 'black')
par <- nlminb(start = c(1,1), objective = testDistribution,
distribution = "normal",
x = D$SLV)
ggplot(D)+
geom_histogram(aes(x = SLV, y= ..density..,), color='black') +#color, fill
stat_function(fun = dnorm, n = dim(D)[1], args = list(mean = par$par[1], sd = par$par[2]), color='red')
# plot(ecdf(D$SLV), verticals = T)
# xseq <- seq(0.9*min(D$SLV), 1.1*max(D$SLV), length.out=100)
# lines(xseq, pnorm(xseq, mean(D$SLV), sd(D$SLV)), col='red')
#plot(xseq, pnorm(xseq, mean(D$SLV), sd(D$SLV)), col='red')
qqnorm(D$SLV)
qqline(D$SLV)
lcauchyFUNC <- function(p, data){
x0 <- p[1] #location R
gam <- p[2] #scale R > 0
return(-sum(dcauchy(x = data, location = x0, scale = gam, log = T)))
}
llstFUNC <- function(p, data){ #location-scale t-distribution
return(-sum(dlst(x = data, df = p[1], mu = p[2], sigma = p[3], log = T)))
}
lgnFUNC <- function(p, data){ #symmetric generalized normal dist
return(-sum(dgnorm(x = data, mu = p[1], alpha = p[2], beta = p[3], log = T)))
}
lsnFUNC <- function(p, data){ #skewed normal dist
return(-sum(dsn(x = data, xi = p[1], omega = p[2], alpha = p[3], log = T)))
}
par.cauchy <- nlminb(start = c(0,1), objective = lcauchyFUNC, data = D$SLV)
par.lst <- nlminb(start = c(1,1,1), objective = llstFUNC, data = D$SLV)
par.gn <- nlminb(start = c(1,1,1), objective = lgnFUNC, data = D$SLV)
## Not defined for negative values of alpha and/or beta.
## Not defined for negative values of alpha and/or beta.
## Not defined for negative values of alpha and/or beta.
## Not defined for negative values of alpha and/or beta.
par.sn <- nlminb(start = c(1,1,1), objective = lsnFUNC, data = D$SLV)
ggplot(D)+
geom_histogram(aes(x = SLV, y= ..density..,), color='black') + #color, fill
stat_function(fun = dnorm, n = dim(D)[1], args = list(mean = par$par[1], sd = par$par[2]), aes(colour = "norm")) +
stat_function(fun = dcauchy, n = dim(D)[1], args = list(location = par.cauchy$par[1],
scale = par.cauchy$par[2]), aes(colour = "cauchy")) +
stat_function(fun = dlst, n = dim(D)[1], args = list(df = par.lst$par[1], mu = par.lst$par[2],
sigma = par.lst$par[3]), aes(colour = "lst")) +
stat_function(fun = dgnorm, n = dim(D)[1], args = list(mu = par.gn$par[1], alpha = par.gn$par[2],
beta = par.gn$par[3]), aes(colour = "gnorm")) +
stat_function(fun = dsn, n = dim(D)[1], args = list(xi = par.sn$par[1], omega = par.sn$par[2],
alpha = par.sn$par[3]), aes(colour = "sn")) +
scale_colour_manual(values = c("blue", "red", "yellow", "black", "grey"))+
labs(colour = "Distribution")
AIC.norm <- -2 * sum(dnorm(x = D$SLV, mean = par$par[1], sd = par$par[2], log = T))
+ 2 * length(par$par)
## [1] 4
AIC.cauchy <- -2 * sum(dcauchy(x = D$SLV, location = par.cauchy$par[1],
scale = par.cauchy$par[2], log = T)) + 2 * length(par.cauchy$par)
AIC.lst <- -2 * sum(dlst(x=D$SLV, df = par.lst$par[1], mu = par.lst$par[2], sigma = par.lst$par[3], log = T)) + 2 * length(par.lst$par)
AIC.gn <- -2 * sum(dgnorm(x=D$SLV, mu = par.gn$par[1], alpha = par.gn$par[2], beta = par.gn$par[3],
log = T)) + 2 * length(par.gn$par)
AIC.sn <- -2 * sum(dsn(x=D$SLV, xi = par.sn$par[1], omega = par.sn$par[2], alpha = par.sn$par[3],
log = T)) + 2 * length(par.sn$par)
round(rbind(AIC.norm, AIC.cauchy, AIC.lst, AIC.gn, AIC.sn), digits=5)
## [,1]
## AIC.norm -1464.000
## AIC.cauchy -1363.414
## AIC.lst -1490.234
## AIC.gn -1480.391
## AIC.sn -1458.000
n <- 100000
par(mfrow=c(2,2)) #comparing by means of generating histograms with n = 100000 points for each of the distributions
hist(D$SLV)
hist(rnorm(n, mean = par$par[1], sd = par$par[2]))
hist(rlst(n, df = par.lst$par[1], mu = par.lst$par[2], sigma = par.lst$par[3]))
hist(rgnorm(n, mu = par.gn$par[1], alpha = par.gn$par[2], beta = par.gn$par[3]))
#hist(rsn(n, xi = par.sn$par[1], omega = par.sn$par[2], alpha = par.sn$par[3]))
First we have the profile likelihoods of the two parameters of the normal distribution optimized at the start of project 3. This is to enable comparison with the profile likelihoods of the chosen distribution which is the generalized normal distribution. Likelihood based and Wald CIs are also calculated but are not printed till later.
alpha <- 0.05
c <- exp(-0.5 * qchisq(1-alpha, df = 1))
par(mfrow=c(1,2))
#####Likelihood based CI for location-scale t-distribution
mle.lst <- par.lst$par
#lstFUNC w/ NLL = T, negative log-likelihood using p's
#lstFUNC w/ NLL = F, log-likelihood using p's
lstFUNC <- function(df, mu, sigma, data, log = F){
if(!log){
return(prod(dlst(x = data, df = df, mu = mu, sigma = sigma, log = F) / 2)) # to avoid inf values
} else {
return(sum(dlst(x = data, df = df, mu = mu, sigma = sigma, log = T)))
}
}
CIfun.lst <- function(y, data, p = "df"){##### T from mean, F for sigma
if(p == "df"){
sum(dlst(x = data, df = mle.lst[1], mu = mle.lst[2], sigma = mle.lst[3], log = T)) -
sum(dlst(x = data, df = y, mu = mle.lst[2], sigma = mle.lst[3], log = T)) -
0.5 * qchisq(1-alpha, df = 1)
} else if(p == "mu") {
sum(dlst(x = data, df = mle.lst[1], mu = mle.lst[2], sigma = mle.lst[3], log = T)) -
sum(dlst(x = data, df = mle.lst[1], mu = y, sigma = mle.lst[3], log = T)) -
0.5 * qchisq(1-alpha, df = 1)
} else { #p == "sigma"
sum(dlst(x = data, df = mle.lst[1], mu = mle.lst[2], sigma = mle.lst[3], log = T)) -
sum(dlst(x = data, df = mle.lst[1], mu = mle.lst[2], sigma = y, log = T)) -
0.5 * qchisq(1-alpha, df = 1)
}
}
###PROFILE likelihoods
par(mfrow = c(1,3))
#df
dfs.lst <- seq(mle.lst[1]-2.5, mle.lst[1]+4.25, by = 0.0001)
df.lst <- sapply(X = dfs.lst, FUN = lstFUNC, mu = mle.lst[2], sigma = mle.lst[3], data = D$SLV, log = F)
plot(dfs.lst, df.lst/max(df.lst), col = 1, type = "l", xlab = "df",
main = "Parameter value for df for location-scale t-distribution model of SLV")
CI.df.lst <- c(uniroot(f=CIfun.lst, interval = c(min(dfs.lst), mle.lst[1]), data = D$SLV, p = "df")$root,
uniroot(f=CIfun.lst, interval = c(mle.lst[1], max(dfs.lst)), data = D$SLV, p = "df")$root)
lines(range(dfs.lst), c*c(1,1), col = 2)
#mu
mus.lst <- seq(mle.lst[2]-0.01, mle.lst[2]+0.01, by = 0.00001)
mu.lst <- sapply(X = mus.lst, FUN = lstFUNC, df = mle.lst[1], sigma = mle.lst[3], data = D$SLV, log = F)
plot(mus.lst, mu.lst/max(mu.lst), col = 1, type = "l", xlab = expression(paste(mu)),
main = "Parameter value for location of location-scale t-distribution model of SLV")
CI.mu.lst <- c(uniroot(f = CIfun.lst, interval = c(min(mus.lst), mle.lst[2]), data = D$SLV, p = "mu")$root,
uniroot(f = CIfun.lst, interval = c(mle.lst[2], max(mus.lst)), data = D$SLV, p = "mu")$root)
lines(range(mus.lst), c*c(1,1), col = 2)
#sigma
sigmas.lst <- seq(mle.lst[3]-0.01,mle.lst[3]+0.01, by = 0.0001)
sigma.lst <- sapply(X = sigmas.lst, FUN = lstFUNC, df = mle.lst[1], mu = mle.lst[2], data = D$SLV, log = F)
plot(sigmas.lst, sigma.lst/max(sigma.lst), col = 1, type = "l", xlab = expression(paste(sigma)),
main = "Parameter value for scale for generalized normal model of SLV")
CI.sigma.lst <- c(uniroot(f = CIfun.lst, interval = c(min(sigmas.lst), mle.lst[3]), data = D$SLV, p = "sigma")$root,
uniroot(f = CIfun.lst, interval = c(mle.lst[3], max(sigmas.lst)), data = D$SLV, p = "sigma")$root)
lines(range(sigmas.lst), c*c(1,1), col = 2)
#Wald CIs
n <- dim(D)[1]
H.df.lst <- hessian(lstFUNC, mle.lst[1], mu = mle.lst[2], sigma = mle.lst[3], data = D$SLV, log = T)
V.df.lst <- as.numeric(-1/H.df.lst)
H.mu.lst <- hessian(lstFUNC, mle.lst[2], df = mle.lst[1], sigma = mle.lst[3], data = D$SLV, log = T)
V.mu.lst <- as.numeric(-1/H.mu.lst)
H.sigma.lst <- hessian(lstFUNC, mle.lst[3], df = mle.lst[1], mu = mle.lst[2], data = D$SLV, log = T)
V.sigma.lst <- as.numeric(-1/H.sigma.lst)
wald.df.lst <- mle.lst[1] + c(-1,1) * qnorm(1-alpha/2) * sqrt(V.df.lst)
wald.mu.lst <- mle.lst[2] + c(-1,1) * qnorm(1-alpha/2) * sqrt(V.mu.lst)
wald.sigma.lst <- mle.lst[3] + c(-1,1) * qnorm(1-alpha/2) * sqrt(V.sigma.lst)
round( rbind(CI.df.lst, wald.df.lst, CI.mu.lst, wald.mu.lst, CI.sigma.lst, wald.sigma.lst), digits=5);round( rbind(mle.lst), digits=5)
## [,1] [,2]
## CI.df.lst 4.39309 9.59388
## wald.df.lst 3.85975 8.71193
## CI.mu.lst -0.00157 0.00663
## wald.mu.lst -0.00158 0.00664
## CI.sigma.lst 0.03656 0.04278
## wald.sigma.lst 0.03639 0.04261
## [,1] [,2] [,3]
## mle.lst 6.28584 0.00253 0.0395
temp1 <- paste("df == ", round(mle.lst[1], digits=2))
temp2 <- paste("mu == ", round(mle.lst[2], digits=5))
temp3 <- paste("sigma == ", round(mle.lst[3], digits=4))
ggplot(D)+
geom_histogram(aes(x = SLV, y= ..density..,), color='black') + #color, fill
stat_function(fun = dlst, n = dim(D)[1], args = list(df = par.lst$par[1], mu = par.lst$par[2],
sigma = par.lst$par[3]), color = 'red') +
annotate( "text", x = 2.7/5*max(D$SLV), y = c(10.5, 10.0, 9.5), label = c(temp1,temp2,temp3), parse = T ) +
ggtitle("Location-scale t-distribution and distribution of the weekly returns")
qqlst <- function (y, line = FALSE, ...)
{
y <- y[!is.na(y)]
n <- length(y)
x <- qlst(c(1:n)/(n + 1), df=mle.lst[1])
m <- mean(y)
ylim <- c(min(y), max(y))
qqplot(x, y, xlab = "location-scale t-distribution plotting position", ylim = ylim,
ylab = "Ordered sample", ...)
if (line)
abline(0, m, lty = 2)
invisible()
}
par(mfrow=c(1,2))
qqnorm(D$SLV)
qqline(D$SLV)
qqlst(D$SLV)
qqline(D$SLV)